library(tidyverse)
── Attaching core tidyverse packages ────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.2 ── Conflicts ──────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ratdat)
#exploration de données
?complete_old
summary(complete_old)
record_id month day year plot_id species_id sex
Min. : 1 Min. : 1.000 Min. : 1.0 Min. :1977 Min. : 1.00 Length:16878 Length:16878
1st Qu.: 4220 1st Qu.: 3.000 1st Qu.: 9.0 1st Qu.:1981 1st Qu.: 5.00 Class :character Class :character
Median : 8440 Median : 6.000 Median :15.0 Median :1983 Median :11.00 Mode :character Mode :character
Mean : 8440 Mean : 6.382 Mean :15.6 Mean :1984 Mean :11.47
3rd Qu.:12659 3rd Qu.: 9.000 3rd Qu.:23.0 3rd Qu.:1987 3rd Qu.:17.00
Max. :16878 Max. :12.000 Max. :31.0 Max. :1989 Max. :24.00
hindfoot_length weight genus species taxa plot_type
Min. : 6.00 Min. : 4.00 Length:16878 Length:16878 Length:16878 Length:16878
1st Qu.:21.00 1st Qu.: 24.00 Class :character Class :character Class :character Class :character
Median :35.00 Median : 42.00 Mode :character Mode :character Mode :character Mode :character
Mean :31.98 Mean : 53.22
3rd Qu.:37.00 3rd Qu.: 53.00
Max. :70.00 Max. :278.00
NA's :2733 NA's :1692
head(complete_old)
str(complete_old)
tibble [16,878 × 13] (S3: tbl_df/tbl/data.frame)
$ record_id : int [1:16878] 1 2 3 4 5 6 7 8 9 10 ...
$ month : int [1:16878] 7 7 7 7 7 7 7 7 7 7 ...
$ day : int [1:16878] 16 16 16 16 16 16 16 16 16 16 ...
$ year : int [1:16878] 1977 1977 1977 1977 1977 1977 1977 1977 1977 1977 ...
$ plot_id : int [1:16878] 2 3 2 7 3 1 2 1 1 6 ...
$ species_id : chr [1:16878] "NL" "NL" "DM" "DM" ...
$ sex : chr [1:16878] "M" "M" "F" "M" ...
$ hindfoot_length: int [1:16878] 32 33 37 36 35 14 NA 37 34 20 ...
$ weight : int [1:16878] NA NA NA NA NA NA NA NA NA NA ...
$ genus : chr [1:16878] "Neotoma" "Neotoma" "Dipodomys" "Dipodomys" ...
$ species : chr [1:16878] "albigula" "albigula" "merriami" "merriami" ...
$ taxa : chr [1:16878] "Rodent" "Rodent" "Rodent" "Rodent" ...
$ plot_type : chr [1:16878] "Control" "Long-term Krat Exclosure" "Control" "Rodent Exclosure" ...
#ggplot
#ggplot
library(ggplot2)
ggplot(data=complete_old, mapping=aes(x = weight, y= hindfoot_length, color=plot_type))+
geom_point(alpha=0.2)
#enlever valeurs manquantes
complete_old <- filter(complete_old, !is.na(weight))
complete_old <- filter(complete_old, !is.na(hindfoot_length))
#library(ggplot2)
ggplot(data=complete_old, mapping=aes(x = weight, y= hindfoot_length, color=plot_type, shape=sex))+
geom_point(alpha=0.2) +
scale_color_viridis_d() +
scale_x_log10()
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_boxplot() +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_boxplot() +
geom_jitter(alpha = 0.1) +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length, color=plot_type ))+
geom_boxplot() +
geom_jitter(alpha = 0.1) +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_boxplot() +
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_boxplot(outlier.shape=NA) +
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
geom_boxplot(outlier.shape=NA) +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
geom_boxplot(outlier.shape=NA, fill=NA) +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length, fill=plot_type ))+
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
geom_violin() +
scale_x_discrete(labels = label_wrap_gen(width=10))
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
geom_boxplot(outlier.shape=NA, fill=NA) +
scale_x_discrete(labels = label_wrap_gen(width=10))+
theme_bw()
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
geom_boxplot(outlier.shape=NA, fill=NA) +
scale_x_discrete(labels = label_wrap_gen(width=10))+
theme_bw()+
theme(legend.position = "none") +
labs(x="Plot type", y = "Hindfoot length (mm)")
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
geom_boxplot(outlier.shape=NA, fill=NA) +
scale_x_discrete(labels = label_wrap_gen(width=10))+
theme_bw()+
theme(lengend.position = "none") +
labs(x="Plot type", y = "Hindfoot length (mm)")+
facet_wrap(vars(sex), nrow = 1)
ggplot(data=complete_old, mapping=aes(x = plot_type, y= hindfoot_length ))+
geom_jitter(alpha = 0.1, aes(color=plot_type)) +
geom_boxplot(outlier.shape=NA, fill=NA) +
scale_x_discrete(labels = label_wrap_gen(width=10))+
theme_bw()+
theme(lengend.position = "none") +
labs(x="Plot type", y = "Hindfoot length (mm)")+
facet_wrap(vars(sex), ncol = 1)
ggsave(filename = "Figures/plot_final.png", plot = plot_final, height = 6, width = 8)
Warning: The `lengend.position` theme element is not defined in the element hierarchy.
#tidyverse
surveys <- read_csv("../Data/raw/surveys_complete_77_89.csv")
Rows: 16878 Columns: 13── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): species_id, sex, genus, species, taxa, plot_type
dbl (7): record_id, month, day, year, plot_id, hindfoot_length, weight
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#select()
#filter()
#mutate()
#group_by()
#summarize()
##select
select(surveys, plot_id,species_id)
select(surveys, c(3,4))
select(surveys, -plot_id)
select(surveys, where(is.numeric))
select(surveys, where(anyNA))
##filter
filter(surveys, year == 1988)
filter(surveys, species_id %in% c("RM","DO"))
filter(surveys, year == 1988 & species_id %in% c("RM","DO"))
#chalenge
# 1er facon
surveys_80_85 <- filter(surveys, year >= 1980 & year <=1985)
surveys_80_85 <- select(surveys_80_85, year, month, plot_id, species_id)
surveys_80_85
select(filter(surveys, year >= 1980 & year <=1985), year, month, plot_id, species_id )
##pipelines %>%
#3eme facon
surveys %>% filter(year==1980:1985) %>% select(year, month, plot_id, species_id)
#chalenge 2
surveys %>% filter(year==1988) %>% select(record_id, month, species_id)
##mutate
surveys %>% mutate(weight_kg = weight / 100) %>% relocate(weight_kg, .after = record_id)
surveys %>% mutate(weight_kg = weight / 100, weight_lbs = weight_kg*2.2) %>% relocate(weight_lbs, .after = record_id)
surveys %>% mutate(weight_kg = weight / 100, weight_lbs = weight_kg*2.2) %>% relocate(weight_lbs, .after = record_id) %>% relocate(weight_kg, .after = record_id)
surveys %>% filter(!is.na(weight)) %>% mutate(weight_kg = weight / 100, weight_lbs = weight_kg*2.2) %>% relocate(weight_lbs, .after = record_id) %>% relocate(weight_kg, .after = record_id)
surveys %>% mutate(date = paste(year, month, day, sep = "-")) %>% relocate(date, .after = year)
library(lubridate)
surveys %>% mutate(date = ymd(paste(year, month, day, sep = "-"))) %>% relocate(date, .after = year)
##group_by & summarize
surveys %>% group_by(sex) %>% summarize(mean.weight = mean(weight))
surveys %>% group_by(sex) %>% summarize(mean.weight = mean(weight, na.rm = T), count = n())
#chalenge 3
surveys %>% mutate(date = ymd(paste(year, month, day, sep = "-"))) %>%
filter(!is.na(sex)) %>%
group_by(sex,date) %>%
summarize(count = n()) %>%
ggplot(aes(x=date, y= count, color=sex)) +
geom_line() +
theme_bw()
`summarise()` has grouped output by 'sex'. You can override using the `.groups` argument.
NA
NA